package com.hylanda.entity.keyword;

import java.io.File;
import java.io.IOException;
import java.util.*;

import com.hylanda.segmentor.common.SegGrain;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.hylanda.segmentor.BasicSegmentor;
import com.hylanda.segmentor.common.SegOption;
import com.hylanda.segmentor.common.SegResult;
import com.hylanda.segmentor.common.Token;
import com.hylanda.tools.CommonTools;

public class HLKeywordProcess {
	
	private final String FIELD_SUFFIX_KEYWORD = "_keywords";
	
	private Set<String> dealFieldSet = new HashSet<>();
	private BasicSegmentor segmentor = new BasicSegmentor();
	private SegOption option = new SegOption();
	private int count = 0;
	
	private final String FILENAME_KEYWORD = "keyword.txt";
	
	private boolean usrFillDict = false;
	private boolean isUseCoreDict = false;
	
	public boolean init(String keywords, 
			Set<String> fieldSet, 
			int cnt, 
			String dictPath, 
			String savePath, 
			String id,
			StringBuffer errBuffer){
		
		boolean ret = false;
		
		if(fieldSet.isEmpty() || cnt == 0){
			return true;
		}
		
		dealFieldSet.addAll(fieldSet);
		count = cnt;
		
		//加载自定义词典
		if(StringUtils.isEmpty(keywords)){
			ret = segmentor.useStaticDictionary();
			if(!ret){
				errBuffer.append("seg load keyword dict err");
			}else{
				isUseCoreDict = true;
			}
		}else{
			String dictfile = CommonTools.buildUsrDict(dictPath, savePath, id, keywords, null);
			if(dictfile == null){
				errBuffer.append("build keyword dict err");
				return ret;
			}

			ret = segmentor.useStaticCoreDictionary();
			if(!ret){
				errBuffer.append("seg load static core dict err");
				return ret;
			}
			
			ret = segmentor.loadUserDict(dictfile);
			if(!ret){
				segmentor.unuseStaticCoreDictionary();
				errBuffer.append("load keyword dict err");
			}else{
				usrFillDict = true;
				isUseCoreDict = true;
			}
			
			new File(dictfile).delete();
		}
		option.grainSize = SegGrain.LARGE;
		option.mergeDateTimeWords = true;

		return ret;
		
	}
	
	public void uninit(){
		if(isUseCoreDict){
			if(usrFillDict){
				segmentor.unloadUserDict();
				segmentor.unuseStaticCoreDictionary();
			}else{
				segmentor.unuseStaticDictionary();
			}
			isUseCoreDict = false;
		}
	}
	
	public boolean doLocRecognition(Map<String, String> dataMap) {

		boolean ret = false;
		
		if(count == 0){
			return false;
		}
		
		// 看下预处理的结果，来判断下是否需要进行分词的处理
		for (String field : dealFieldSet) {
			String content = dataMap.get(field);
			if (StringUtils.isEmpty(content)) {
				continue;
			}

			JSONArray array = getKeywords(field, content);
			if(array.isEmpty()){
				continue;
			}
			
			ret = true;
			
			JSONObject jsonObject = new JSONObject();
			jsonObject.put("data_type", "keyword");
			jsonObject.put("datas", array);
			
			dataMap.put(field.concat(FIELD_SUFFIX_KEYWORD), jsonObject.toJSONString());
		}
		
		return ret;

	}
	
	private JSONArray getKeywords(String field, String content){
		JSONArray array = new JSONArray();
		
		SegResult segResult = segmentor.segment(content, option);
		
		List<Token> keywordList = segResult.getKeywordsList();
		if(keywordList.isEmpty()){
			return array;
		}
		
		int cnt = count > keywordList.size() ? keywordList.size() : count;
		for(int i = 0; i < cnt; i++){
			Token token = keywordList.get(i);
			JSONObject jsonObject = new JSONObject();
			jsonObject.put("str", token.wordStr);
			jsonObject.put("weight", token.weight);
			jsonObject.put("count", token.frequency);
			jsonObject.put("natureFlag", token.natureFlag);
			array.add(jsonObject);
		}
		
		return array;
	}

	public List<String> genKeywordField(String sourceFields){
		ArrayList<String> fieldsList = new ArrayList<>();
		String[] fields = sourceFields.split(",");
		for(String field: fields) {
			fieldsList.add(field.trim().concat(FIELD_SUFFIX_KEYWORD));
		}
		return fieldsList;
	}

}
